{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'3.6.7'"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from platform import python_version\n",
    "\n",
    "python_version()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "import matplotlib\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('0.25.1', '3.0.3')"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.__version__,matplotlib.__version__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe tex2jax_ignore\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>contents</th>\n",
       "      <th>year</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>bar</td>\n",
       "      <td>Sed mollis tempor accumsan.</td>\n",
       "      <td>2010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>bar</td>\n",
       "      <td>Sed mollis tempor accumsan.</td>\n",
       "      <td>2010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>baz</td>\n",
       "      <td>Nullam et feugiat turpis, non condimentum dolor.</td>\n",
       "      <td>2005</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>baz</td>\n",
       "      <td>Aenean eu aliquam nunc.</td>\n",
       "      <td>2005</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>foo</td>\n",
       "      <td>Lorem ipsum dolor sit amet.</td>\n",
       "      <td>2011</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>foo</td>\n",
       "      <td>Lorem ipsum dolor sit amet.</td>\n",
       "      <td>2011</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  title                                          contents  year\n",
       "0   bar                       Sed mollis tempor accumsan.  2010\n",
       "1   bar                       Sed mollis tempor accumsan.  2010\n",
       "2   baz  Nullam et feugiat turpis, non condimentum dolor.  2005\n",
       "3   baz                           Aenean eu aliquam nunc.  2005\n",
       "4   foo                       Lorem ipsum dolor sit amet.  2011\n",
       "5   foo                       Lorem ipsum dolor sit amet.  2011"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame({\n",
    "    'title': ['bar','bar','baz','baz','foo','foo'],\n",
    "    'contents':[\n",
    "        'Sed mollis tempor accumsan.',\n",
    "        'Sed mollis tempor accumsan.',\n",
    "        'Nullam et feugiat turpis, non condimentum dolor.',\n",
    "        'Aenean eu aliquam nunc.',\n",
    "        'Lorem ipsum dolor sit amet.',\n",
    "        'Lorem ipsum dolor sit amet.'\n",
    "    ],\n",
    "    'year':[2010,2010,2005,2005,2011,2011]\n",
    "})\n",
    "\n",
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## show"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe tex2jax_ignore\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>contents</th>\n",
       "      <th>year</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>bar</td>\n",
       "      <td>Sed mollis tempor accumsan.</td>\n",
       "      <td>2010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>foo</td>\n",
       "      <td>Lorem ipsum dolor sit amet.</td>\n",
       "      <td>2011</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  title                     contents  year\n",
       "1   bar  Sed mollis tempor accumsan.  2010\n",
       "5   foo  Lorem ipsum dolor sit amet.  2011"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[df.duplicated()]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## show including original"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe tex2jax_ignore\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>contents</th>\n",
       "      <th>year</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>bar</td>\n",
       "      <td>Sed mollis tempor accumsan.</td>\n",
       "      <td>2010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>bar</td>\n",
       "      <td>Sed mollis tempor accumsan.</td>\n",
       "      <td>2010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>foo</td>\n",
       "      <td>Lorem ipsum dolor sit amet.</td>\n",
       "      <td>2011</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>foo</td>\n",
       "      <td>Lorem ipsum dolor sit amet.</td>\n",
       "      <td>2011</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  title                     contents  year\n",
       "0   bar  Sed mollis tempor accumsan.  2010\n",
       "1   bar  Sed mollis tempor accumsan.  2010\n",
       "4   foo  Lorem ipsum dolor sit amet.  2011\n",
       "5   foo  Lorem ipsum dolor sit amet.  2011"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[df.duplicated(keep=False)]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## count"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(df[df.duplicated()])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## show, some columns only"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe tex2jax_ignore\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>contents</th>\n",
       "      <th>year</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>bar</td>\n",
       "      <td>Sed mollis tempor accumsan.</td>\n",
       "      <td>2010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>baz</td>\n",
       "      <td>Aenean eu aliquam nunc.</td>\n",
       "      <td>2005</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>foo</td>\n",
       "      <td>Lorem ipsum dolor sit amet.</td>\n",
       "      <td>2011</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  title                     contents  year\n",
       "1   bar  Sed mollis tempor accumsan.  2010\n",
       "3   baz      Aenean eu aliquam nunc.  2005\n",
       "5   foo  Lorem ipsum dolor sit amet.  2011"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df[df.duplicated(subset=['title','year'])]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## drop duplicates, keep original"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe tex2jax_ignore\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>contents</th>\n",
       "      <th>year</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>bar</td>\n",
       "      <td>Sed mollis tempor accumsan.</td>\n",
       "      <td>2010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>baz</td>\n",
       "      <td>Nullam et feugiat turpis, non condimentum dolor.</td>\n",
       "      <td>2005</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>baz</td>\n",
       "      <td>Aenean eu aliquam nunc.</td>\n",
       "      <td>2005</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>foo</td>\n",
       "      <td>Lorem ipsum dolor sit amet.</td>\n",
       "      <td>2011</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  title                                          contents  year\n",
       "0   bar                       Sed mollis tempor accumsan.  2010\n",
       "2   baz  Nullam et feugiat turpis, non condimentum dolor.  2005\n",
       "3   baz                           Aenean eu aliquam nunc.  2005\n",
       "4   foo                       Lorem ipsum dolor sit amet.  2011"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.drop_duplicates()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## drop duplicates based on some columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe tex2jax_ignore\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>contents</th>\n",
       "      <th>year</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>bar</td>\n",
       "      <td>Sed mollis tempor accumsan.</td>\n",
       "      <td>2010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>baz</td>\n",
       "      <td>Nullam et feugiat turpis, non condimentum dolor.</td>\n",
       "      <td>2005</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>foo</td>\n",
       "      <td>Lorem ipsum dolor sit amet.</td>\n",
       "      <td>2011</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  title                                          contents  year\n",
       "0   bar                       Sed mollis tempor accumsan.  2010\n",
       "2   baz  Nullam et feugiat turpis, non condimentum dolor.  2005\n",
       "4   foo                       Lorem ipsum dolor sit amet.  2011"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.drop_duplicates(subset=['title','year'])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## drop columns that are or have duplicates"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe tex2jax_ignore\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>contents</th>\n",
       "      <th>year</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>baz</td>\n",
       "      <td>Nullam et feugiat turpis, non condimentum dolor.</td>\n",
       "      <td>2005</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>baz</td>\n",
       "      <td>Aenean eu aliquam nunc.</td>\n",
       "      <td>2005</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  title                                          contents  year\n",
       "2   baz  Nullam et feugiat turpis, non condimentum dolor.  2005\n",
       "3   baz                           Aenean eu aliquam nunc.  2005"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.drop_duplicates(keep=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## mark duplicates"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe tex2jax_ignore\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>contents</th>\n",
       "      <th>year</th>\n",
       "      <th>is_duplicate</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>bar</td>\n",
       "      <td>Sed mollis tempor accumsan.</td>\n",
       "      <td>2010</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>bar</td>\n",
       "      <td>Sed mollis tempor accumsan.</td>\n",
       "      <td>2010</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>baz</td>\n",
       "      <td>Nullam et feugiat turpis, non condimentum dolor.</td>\n",
       "      <td>2005</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>baz</td>\n",
       "      <td>Aenean eu aliquam nunc.</td>\n",
       "      <td>2005</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>foo</td>\n",
       "      <td>Lorem ipsum dolor sit amet.</td>\n",
       "      <td>2011</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>foo</td>\n",
       "      <td>Lorem ipsum dolor sit amet.</td>\n",
       "      <td>2011</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  title                                          contents  year  is_duplicate\n",
       "0   bar                       Sed mollis tempor accumsan.  2010         False\n",
       "1   bar                       Sed mollis tempor accumsan.  2010          True\n",
       "2   baz  Nullam et feugiat turpis, non condimentum dolor.  2005         False\n",
       "3   baz                           Aenean eu aliquam nunc.  2005         False\n",
       "4   foo                       Lorem ipsum dolor sit amet.  2011         False\n",
       "5   foo                       Lorem ipsum dolor sit amet.  2011          True"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.assign(\n",
    "    is_duplicate=lambda d: d.duplicated()\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## custom keep logic"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe tex2jax_ignore\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>contents</th>\n",
       "      <th>year</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>bar</td>\n",
       "      <td>Sed mollis tempor accumsan.</td>\n",
       "      <td>2009</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>bar</td>\n",
       "      <td>Sed mollis tempor accumsan.</td>\n",
       "      <td>2019</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>baz</td>\n",
       "      <td>Nullam et feugiat turpis, non condimentum dolor.</td>\n",
       "      <td>2005</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>baz</td>\n",
       "      <td>Aenean eu aliquam nunc.</td>\n",
       "      <td>2005</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>foo</td>\n",
       "      <td>Lorem ipsum dolor sit amet.</td>\n",
       "      <td>2015</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>foo</td>\n",
       "      <td>Lorem ipsum dolor sit amet.</td>\n",
       "      <td>1995</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  title                                          contents  year\n",
       "0   bar                       Sed mollis tempor accumsan.  2009\n",
       "1   bar                       Sed mollis tempor accumsan.  2019\n",
       "2   baz  Nullam et feugiat turpis, non condimentum dolor.  2005\n",
       "3   baz                           Aenean eu aliquam nunc.  2005\n",
       "4   foo                       Lorem ipsum dolor sit amet.  2015\n",
       "5   foo                       Lorem ipsum dolor sit amet.  1995"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame({\n",
    "    'title': ['bar','bar','baz','baz','foo','foo'],\n",
    "    'contents':[\n",
    "        'Sed mollis tempor accumsan.',\n",
    "        'Sed mollis tempor accumsan.',\n",
    "        'Nullam et feugiat turpis, non condimentum dolor.',\n",
    "        'Aenean eu aliquam nunc.',\n",
    "        'Lorem ipsum dolor sit amet.',\n",
    "        'Lorem ipsum dolor sit amet.'\n",
    "    ],\n",
    "    'year':[2009,2019,2005,2005,2015,1995]\n",
    "})\n",
    "\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe tex2jax_ignore\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>contents</th>\n",
       "      <th>year</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>bar</td>\n",
       "      <td>Sed mollis tempor accumsan.</td>\n",
       "      <td>2019</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>baz</td>\n",
       "      <td>Aenean eu aliquam nunc.</td>\n",
       "      <td>2005</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>baz</td>\n",
       "      <td>Nullam et feugiat turpis, non condimentum dolor.</td>\n",
       "      <td>2005</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>foo</td>\n",
       "      <td>Lorem ipsum dolor sit amet.</td>\n",
       "      <td>2015</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  title                                          contents  year\n",
       "1   bar                       Sed mollis tempor accumsan.  2019\n",
       "3   baz                           Aenean eu aliquam nunc.  2005\n",
       "2   baz  Nullam et feugiat turpis, non condimentum dolor.  2005\n",
       "4   foo                       Lorem ipsum dolor sit amet.  2015"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.sort_values(\n",
    "    ['title','contents','year']\n",
    ").drop_duplicates(\n",
    "    subset=['title','contents'],keep='last'\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.6",
   "language": "python",
   "name": "python36"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
